In [1]:
import pandas as pd
import plotly.graph_objects as go
import json
network = json.load(open("network.json"))

node_df = pd.DataFrame(network["nodes"])
nodes = sorted(node_df.name)

edge_df = pd.DataFrame(network["links"])
edge_df["source_index"] = edge_df.source.apply(lambda s: nodes.index(s))
edge_df["target_index"] = edge_df.target.apply(lambda s: nodes.index(s))

# Edges of interest
eoi = edge_df[edge_df.val >= 50]
eoi
Out[1]:
source target val source_index target_index
3 The University of Auckland The University of Auckland 61 8585 8585
18 PwC New Zealand PwC New Zealand 998 6805 6805
42 Deloitte New Zealand Deloitte New Zealand 373 2316 2316
59 PwC PwC 108 6754 6754
60 PwC New Zealand PwC 62 6805 6754
76 ASB Bank ASB Bank 66 189 189
130 KPMG New Zealand KPMG New Zealand 814 4520 4520
221 PwC PwC New Zealand 105 6754 6805
268 Deloitte New Zealand Deloitte 98 2316 2268
356 ANZ ANZ 72 153 153
418 Deloitte Deloitte 231 2268 2268
437 EY EY 848 2564 2564
627 Bank of New Zealand Bank of New Zealand 62 1022 1022
642 Fonterra Fonterra 72 3073 3073
In [2]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      label = nodes
    ),
    link = dict(
      source = eoi.source_index,
      target = eoi.target_index,
      value = eoi.val
  ))])

fig.update_layout(title_text="LinkedIn workforce flows to and from KPMG, Deloitte, PwC and EY in NZ")
fig.show()
In [3]:
df = pd.read_excel("linkedin.xlsx", sheet_name="Edges (Industries)")
df
Out[3]:
source target val
0 Management Consulting Management Consulting 3064
1 Accounting Accounting 1568
2 Accounting Management Consulting 679
3 Information Technology and Services Information Technology and Services 533
4 Banking Banking 469
... ... ... ...
2749 Food & Beverages Machinery 1
2750 Building Materials Real Estate 1
2751 Renewables & Environment Utilities 1
2752 Hospital & Health Care Supermarkets 1
2753 Real Estate Banking 1

2754 rows × 3 columns

In [7]:
industries = sorted(pd.Series(df.source.tolist() + df.target.tolist()).unique())
industries
Out[7]:
['Accounting',
 'Airlines/Aviation',
 'Alternative Dispute Resolution',
 'Alternative Medicine',
 'Apparel & Fashion',
 'Architecture & Planning',
 'Automotive',
 'Aviation & Aerospace',
 'Banking',
 'Biotechnology',
 'Broadcast Media',
 'Building Materials',
 'Business Supplies and Equipment',
 'Capital Markets',
 'Chemicals',
 'Civic & Social Organization',
 'Civil Engineering',
 'Commercial Real Estate',
 'Computer & Network Security',
 'Computer Games',
 'Computer Hardware',
 'Computer Networking',
 'Computer Software',
 'Construction',
 'Consumer Electronics',
 'Consumer Goods',
 'Consumer Services',
 'Cosmetics',
 'Dairy',
 'Defense & Space',
 'Design',
 'E-Learning',
 'Education Management',
 'Electrical/Electronic Manufacturing',
 'Entertainment',
 'Environmental Services',
 'Events Services',
 'Executive Office',
 'Facilities Services',
 'Farming',
 'Financial Services',
 'Fine Art',
 'Fishery',
 'Food & Beverages',
 'Food Production',
 'Fund-Raising',
 'Furniture',
 'Gambling & Casinos',
 'Glass, Ceramics & Concrete',
 'Government Administration',
 'Government Relations',
 'Graphic Design',
 'Health, Wellness and Fitness',
 'Higher Education',
 'Hospital & Health Care',
 'Hospitality',
 'Human Resources',
 'Import and Export',
 'Individual & Family Services',
 'Industrial Automation',
 'Information Services',
 'Information Technology and Services',
 'Insurance',
 'International Affairs',
 'International Trade and Development',
 'Internet',
 'Investment Banking',
 'Investment Management',
 'Law Enforcement',
 'Law Practice',
 'Legal Services',
 'Legislative Office',
 'Leisure, Travel & Tourism',
 'Logistics and Supply Chain',
 'Luxury Goods & Jewelry',
 'Machinery',
 'Management Consulting',
 'Maritime',
 'Market Research',
 'Marketing and Advertising',
 'Mechanical or Industrial Engineering',
 'Media Production',
 'Medical Devices',
 'Medical Practice',
 'Mental Health Care',
 'Military',
 'Mining & Metals',
 'Mobile Games',
 'Motion Pictures and Film',
 'Museums and Institutions',
 'Music',
 'Newspapers',
 'Nonprofit Organization Management',
 'Oil & Energy',
 'Online Media',
 'Outsourcing/Offshoring',
 'Package/Freight Delivery',
 'Packaging and Containers',
 'Paper & Forest Products',
 'Performing Arts',
 'Pharmaceuticals',
 'Philanthropy',
 'Photography',
 'Plastics',
 'Political Organization',
 'Primary/Secondary Education',
 'Printing',
 'Professional Training & Coaching',
 'Program Development',
 'Public Policy',
 'Public Relations and Communications',
 'Public Safety',
 'Publishing',
 'Real Estate',
 'Recreational Facilities and Services',
 'Religious Institutions',
 'Renewables & Environment',
 'Research',
 'Restaurants',
 'Retail',
 'Security and Investigations',
 'Semiconductors',
 'Sporting Goods',
 'Sports',
 'Staffing and Recruiting',
 'Supermarkets',
 'Telecommunications',
 'Textiles',
 'Think Tanks',
 'Tobacco',
 'Translation and Localization',
 'Transportation/Trucking/Railroad',
 'Utilities',
 'Venture Capital & Private Equity',
 'Veterinary',
 'Wholesale',
 'Wine and Spirits',
 'Wireless',
 'Writing and Editing']
In [8]:
df["source_index"] = df.source.apply(lambda i: industries.index(i))
df["target_index"] = df.target.apply(lambda i: industries.index(i))
df.head(50)
Out[8]:
source target val source_index target_index
0 Management Consulting Management Consulting 3064 76 76
1 Accounting Accounting 1568 0 0
2 Accounting Management Consulting 679 0 76
3 Information Technology and Services Information Technology and Services 533 61 61
4 Banking Banking 469 8 8
5 Management Consulting Accounting 409 76 0
6 Higher Education Management Consulting 315 53 76
7 Information Technology and Services Management Consulting 304 61 76
8 Government Administration Government Administration 255 49 49
9 Financial Services Financial Services 244 40 40
10 Financial Services Management Consulting 243 40 76
11 Higher Education Higher Education 219 53 53
12 Management Consulting Banking 207 76 8
13 Management Consulting Information Technology and Services 198 76 61
14 Government Administration Management Consulting 177 49 76
15 Management Consulting Financial Services 167 76 40
16 Telecommunications Telecommunications 165 126 126
17 Banking Management Consulting 163 8 76
18 Higher Education Accounting 152 53 0
19 Management Consulting Higher Education 131 76 53
20 Computer Software Computer Software 130 22 22
21 Law Practice Law Practice 120 69 69
22 Management Consulting Government Administration 119 76 49
23 Insurance Insurance 115 62 62
24 Retail Management Consulting 111 119 76
25 Financial Services Accounting 109 40 0
26 Information Technology and Services Accounting 108 61 0
27 Accounting Higher Education 100 0 53
28 Retail Retail 96 119 119
29 Utilities Utilities 91 132 132
30 Accounting Financial Services 90 0 40
31 Education Management Management Consulting 89 32 76
32 Banking Accounting 89 8 0
33 Accounting Banking 88 0 8
34 Dairy Dairy 83 28 28
35 Management Consulting Computer Software 75 76 22
36 Accounting Information Technology and Services 75 0 61
37 Law Practice Management Consulting 74 69 76
38 Government Administration Accounting 69 49 0
39 Airlines/Aviation Airlines/Aviation 63 1 1
40 Hospital & Health Care Hospital & Health Care 63 54 54
41 Nonprofit Organization Management Management Consulting 62 92 76
42 Food & Beverages Food & Beverages 62 43 43
43 Retail Accounting 60 119 0
44 Insurance Management Consulting 60 62 76
45 Financial Services Banking 60 40 8
46 Management Consulting Law Practice 60 76 69
47 Management Consulting Insurance 59 76 62
48 Telecommunications Management Consulting 55 126 76
49 Banking Financial Services 55 8 40
In [9]:
eoi = df[df.val > 100]
In [10]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      label = industries
    ),
    link = dict(
      source = eoi.source_index,
      target = eoi.target_index,
      value = eoi.val
  ))])

fig.update_layout(title_text="LinkedIn workforce flows to and from KPMG, Deloitte, PwC and EY in NZ")
fig.show()
In [ ]:
 
In [ ]: